<?php

function sogouwap($tasks,$host='wap.sogou.com'){

    Global $whitelist; //白名单
    Global $posturl;
    Global $filepath;   //开关文件

    $nul = 0;

    //每页搜索记录
    for($pn = 0;$pn < $tasks['pn'];$pn++){

        //搜狗引擎列表
        $header = getheader($host);

        $url="https://wap.sogou.com/web/searchList.jsp?keyword=".$tasks['search']."&p=".($pn+1);

        print_f($url."\n");

        $contents = Get_Web_Contents($url,'GET','','',$header);


        $text = str_replace(array("\r\n", "\r", "\n"), "",$contents['Body']);


        $text = preg_replace("/<(style|script)(.*?)<\/(style|script)>/i", "", $text);//去css和js



        preg_match_all('/(class="site">(.*?)<span class=\'keyword\')|(<a class="resultLink" href="(.*?)<\/a>)/',$text,$content);//取搜索记录
//        preg_match_all('/(class="site">(.*?)<span class=\'keyword\')/',$text,$content);//取搜索记录
//        if(empty($content[0])) {
//            preg_match_all('/(<a class="resultLink" href="(.*?)<\/a>)/', $text, $content);//取搜索记录
//        }


        //每条搜索记录
        foreach($content[0] as $k => $v){
            
            echo $k,"\t";

            preg_match("/(site\">([^\/]+\.)?({$whitelist})<)|(href=\"([^\/]+\.)?({$whitelist})<)/",$v,$url);

            if(!empty($url)){//白名单
                continue;
            }

            preg_match("/url=(.*?)&/", $v, $url);//取网站快照

            $url = htmlspecialchars_decode(urldecode($url[1]));
            
            $header = getheader(parse_url($url)['host']);

            $finalsnapshot = finalsnapshot($url,'GET','','',$header);

            if(empty($finalsnapshot['Body'])){
                print_f("快照为空\n".$url);
                continue;
            }

            $snapshot = str_replace(array("\r\n", "\r", "\n","\t"), "",$finalsnapshot['Body']);

            //转码
            $snapshot = mb_convert_encoding(
                $snapshot, 
                'UTF-8',
                mb_detect_encoding(
                    $snapshot, 
                    array("ASCII",'UTF-8',"GB2312","GBK",'BIG5')
                )
            );

            preg_match('/<title>(.*?)<\/title>/i',$snapshot,$data);

            $snapshot = codeWords($snapshot);//命中词加样式
            
            if(!empty($snapshot)){
                print_f($data[1]."\n");

                $insert['tasks_id'] = $tasks['id'];
                $insert['tasks_name'] = $tasks['name'];
                $insert['search'] = $tasks['search'];
                $insert['keyword'] = $snapshot['count'];
                $insert['url'] = $url;
                $insert['title'] = $data[1];
                $insert['pn'] = $pn + 1;
                $insert['po'] = $k + 1;
                $insert['status'] = 1;
                $insert['addtime'] = time();
                $insert['snapshot'] = $snapshot['snapshot'];
                //提交
                Get_Web_Contents($posturl, 'POST', $insert);
            }else{
                print_f("未命中\n") ;
            }
        }
        echo "\n";
    }
}

?>